import urllib.request
import urllib.error
import re
url="http://blog.csdn.net/"
headers=("User-Agent","Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:58.0) Gecko/20100101 Firefox/58.0")
opener=urllib.request.build_opener()
opener.addheaders=[headers]
urllib.request.install_opener(opener)
data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
pat=r'http://blog.csdn.net/.*?(.*?/\d\d\d\d\d\d\d\d)"'
alllink=re.compile(pat).findall(data)
alllink1= sorted(set(alllink),key=alllink.index)
j=0
for i in range(0,len(alllink1)):
    localpath="D:\\第二周作业\\"+str(i)+".html"
    thislink="http://blog.csdn.net/"+alllink1[i]
    if __name__ == "__main__":
       req =urllib.request.Request(thislink)
       try:
           urllib.request.urlopen(req)
           j+=1
           urllib.request.urlretrieve(thislink,filename=localpath)
           print("当前文章(第"+str(j)+"篇)爬取成功")
           print(thislink)
       except urllib.error.HTTPError as e:
           print(e.code)
